package cn.udrm.elasticsearch.fileread;

import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

import java.io.File;
import java.io.IOException;

/**
 * 读取pdf文件内容
 * @author xurongbei
 */
public class PDFReadContent {

    public static String getPdfContent(File file) throws IOException {
        PDDocument pd = PDDocument.load(file);
        PDFTextStripper stripper = new PDFTextStripper();
        String result=stripper.getText(pd);
        pd.close();
        return result;
    }
}
